NYC Taxi
library(arrow)
##
## Attaching package: 'arrow'
## The following object is masked from 'package:utils':
##
## timestamp
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.5
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
TLC Trip Record Data
tlc <- read_parquet(file = "~/Documents/data/yellow_tripdata_2022-06.parquet")
str(tlc)
## tibble [3,558,124 × 19] (S3: tbl_df/tbl/data.frame)
## $ VendorID : int [1:3558124] 1 1 2 1 1 2 2 1 2 2 ...
## $ tpep_pickup_datetime : POSIXct[1:3558124], format: "2022-05-31 20:25:41" "2022-05-31 20:44:40" ...
## $ tpep_dropoff_datetime: POSIXct[1:3558124], format: "2022-05-31 20:48:22" "2022-05-31 21:01:48" ...
## $ passenger_count : num [1:3558124] 1 1 1 2 0 1 1 1 1 1 ...
## $ trip_distance : num [1:3558124] 11 4.2 9.49 12.1 1.8 2.02 8.08 4.3 8.78 1.76 ...
## $ RatecodeID : num [1:3558124] 1 1 1 1 1 1 1 1 1 1 ...
## $ store_and_fwd_flag : chr [1:3558124] "N" "N" "N" "N" ...
## $ PULocationID : int [1:3558124] 70 170 264 132 140 148 158 246 197 48 ...
## $ DOLocationID : int [1:3558124] 48 226 113 17 163 158 116 262 191 186 ...
## $ payment_type : int [1:3558124] 1 1 1 2 1 1 1 1 1 1 ...
## $ fare_amount : num [1:3558124] 32 14 26 37 9 9 26.5 15 26.5 7.5 ...
## $ extra : num [1:3558124] 3 3 0.5 1.75 3 0.5 0.5 3 0.5 0.5 ...
## $ mta_tax : num [1:3558124] 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 ...
## $ tip_amount : num [1:3558124] 2 0 5 0 2.55 0.64 7.58 3.75 5.56 2.26 ...
## $ tolls_amount : num [1:3558124] 6.55 0 6.55 0 0 0 0 0 0 0 ...
## $ improvement_surcharge: num [1:3558124] 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 ...
## $ total_amount : num [1:3558124] 44.4 17.8 42.6 39.5 15.3 ...
## $ congestion_surcharge : num [1:3558124] 2.5 2.5 2.5 0 2.5 2.5 2.5 2.5 0 2.5 ...
## $ airport_fee : num [1:3558124] 0 0 1.25 1.25 0 0 0 0 0 0 ...
library(lubridate)
## Loading required package: timechange
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:arrow':
##
## duration
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
tlc <- tlc %>%
mutate(
date_pickup = date(tpep_pickup_datetime),
date_dropoff = date(tpep_dropoff_datetime),
hour_pickup = hours(tpep_pickup_datetime),
hour_dropoff = hours(tpep_dropoff_datetime),
weekday_pickup = wday(tpep_pickup_datetime, label = TRUE),
day_pickup = day(tpep_pickup_datetime)
)
tlc %>% group_by(weekday_pickup) %>%
tally %>%
ggplot(aes(x = weekday_pickup, y = n)) + geom_bar(stat = "identity")

tlc %>% group_by(weekday_pickup) %>%
summarise(mean_passanger = mean(passenger_count, na.rm = TRUE)) %>%
ggplot(aes(x = weekday_pickup, y = mean_passanger)) + geom_bar(stat = "identity")

tlc %>% group_by(day_pickup) %>%
tally %>%
ggplot(aes(x = day_pickup, y = n)) + geom_line()

Geographic
library(sf)
## Linking to GEOS 3.10.2, GDAL 3.4.2, PROJ 8.2.1; sf_use_s2() is TRUE
library(ggmap)
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.
tlc_zone <- st_read("~/Documents/taxi_zones/taxi_zones.shp", quiet = TRUE)
plot(tlc_zone)

tlc_zone <- st_transform(tlc_zone, crs = 4326)
ggplot(tlc_zone) + geom_sf() + theme_inset()

our_neighborhood <- tlc_zone %>%
filter(zone == "Gramercy"|zone == "Kips Bay")
ggplot(tlc_zone) + geom_sf() + theme_inset() +
geom_sf(data = our_neighborhood, fill = "red")

bbox <- st_bbox(tlc_zone) %>% as.numeric
nyc_map <- get_stamenmap(bbox = bbox, messaging = FALSE, zoom = 11,
maptype = "toner-lite", format = c("png"))
## Source : http://tile.stamen.com/toner-lite/11/601/768.png
## Source : http://tile.stamen.com/toner-lite/11/602/768.png
## Source : http://tile.stamen.com/toner-lite/11/603/768.png
## Source : http://tile.stamen.com/toner-lite/11/604/768.png
## Source : http://tile.stamen.com/toner-lite/11/601/769.png
## Source : http://tile.stamen.com/toner-lite/11/602/769.png
## Source : http://tile.stamen.com/toner-lite/11/603/769.png
## Source : http://tile.stamen.com/toner-lite/11/604/769.png
## Source : http://tile.stamen.com/toner-lite/11/601/770.png
## Source : http://tile.stamen.com/toner-lite/11/602/770.png
## Source : http://tile.stamen.com/toner-lite/11/603/770.png
## Source : http://tile.stamen.com/toner-lite/11/604/770.png
## Source : http://tile.stamen.com/toner-lite/11/601/771.png
## Source : http://tile.stamen.com/toner-lite/11/602/771.png
## Source : http://tile.stamen.com/toner-lite/11/603/771.png
## Source : http://tile.stamen.com/toner-lite/11/604/771.png
ggmap(nyc_map) +
geom_sf(data = our_neighborhood, fill = "red", inherit.aes = FALSE)
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.



#inherit.aes to use coordinates from data table, not nyc_map
ggmap(nyc_map) + geom_sf(data = joined_tbl, aes(fill = N), inherit.aes = FALSE) +
scale_fill_viridis_c(option = "A")
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
